SGI Freeware 2002 November

home *** CD-ROM | disk | FTP | other *** search

/ SGI Freeware 2002 November / SGI Freeware 2002 November - Disc 2.iso / dist / fw_ispell.idb / usr / freeware / src / ispell / patches.z / patches

Wrap

Text File | 2000-01-25 | 22KB | 740 lines

--- ./languages/english/Makefile Thu Oct 12 12:04:08 1995 +++ ../ispell-3.1/./languages/english/Makefile Tue Jan 4 08:21:33 2000 @@ -436,8 +436,8 @@ set -x; \ PATH=$(PATHADDER):$$PATH; \ export PATH; \ - munchlist -v -l $(AFFIXES) $$dicts \ - > english.med+ \ + munchlist -l $(AFFIXES) $$dicts \ + 1> english.med+ 2> /dev/null \ || rm -f english.med+ test -s english.med+ \ || (echo 'error: zero-length dictionary generated'; \ --- ./correct.c Thu Oct 12 12:04:06 1995 +++ ../ispell-3.1/./correct.c Tue Jan 4 08:16:24 2000 @@ -50,6 +50,13 @@ /* * $Log: correct.c,v $ + * + * HTML-entities added by Casper Maarbjerg, 1997/05/16 as listed in + * http://uts.cc.utexas.edu/~churchh/latin1.html + * + * Line added by Gerry Tierney to reset insidehtml flag for each new + * file in case a tag was left open by a previous file. 10/14/95 + * * Revision 1.59 1995/08/05 23:19:43 geoff * Fix a bug that caused offsets for long lines to be confused if the * line started with a quoting uparrow. @@ -233,6 +240,9 @@ int bufsize; int ch; + /* line added by Gerry Tierney */ + insidehtml = 0; + for (bufno = 0; bufno < contextsize; bufno++) contextbufs[bufno][0] = '\0'; @@ -295,7 +305,11 @@ char * start_l2; char * begintoken; +#ifdef HTSPECIAL + begintoken = ctok_start == NULL ? contextbufs[0] : ctok_start; +#else begintoken = *curchar - strlen (ctok); +#endif if (icharlen (itok) <= minword) return; /* Accept very short words */ @@ -374,7 +388,11 @@ if (start_l2 < contextbufs[0]) start_l2 = contextbufs[0]; } +#ifdef HTSPECIAL + show_line (start_l2, begintoken, *curchar - begintoken ); +#else show_line (start_l2, begintoken, (int) strlen (ctok)); +#endif if (minimenusize != 0) { @@ -594,6 +612,16 @@ ichar = SET_SIZE + laststringch; else ichar = chartoichar (ch); +#ifdef HTSPECIAL + if (htmlflag == 1 && ch == '&' && !vflag && len == 1) + { + ch = html_ent(cp); + if (output) + (void) putchar (ch); + return 1; + } + else +#endif if (!vflag && iswordch (ichar) && len == 1) { if (output) @@ -1604,6 +1632,11 @@ if (**cc == '\0') break; if (!aflag && !lflag) +#ifdef HTSPECIAL + if (htmlflag == 1 && (unsigned char)**cc >= FIRST_ISO) + fprintf(outfile, iso_ent[(unsigned char) **cc - FIRST_ISO]); + else +#endif (void) putc (**cc, outfile); (*cc)++; } --- ./ispell.c Thu Oct 12 12:04:07 1995 +++ ../ispell-3.1/./ispell.c Tue Jan 4 08:16:24 2000 @@ -49,6 +49,12 @@ /* * $Log: ispell.c,v $ + * + * Modifications made by Gerry Tierney <gtierney@nova.ucd.ie> to + * allow checking of html code. Adds -h switch and checking for + * html files by .html or .htm extension. + * 14th of October 1995 + * * Revision 1.133 1995/10/11 04:30:29 geoff * Get rid of an unused variable. * @@ -298,7 +304,9 @@ * ABCDEFGHIJKLMNOPQRSTUVWXYZ 0123456789 * ^^^^ ^^^ ^ ^^ ^^ * abcdefghijklmnopqrstuvwxyz - * ^^^^^^ ^^^ ^ ^^ ^^^ + * ^^^^^^ ^ ^^^ ^ ^^ ^^^ + * + * -h flag used by Gerry Tierney for html-mode */ arglen = strlen (*argv); switch ((*argv)[1]) @@ -438,6 +446,9 @@ (void) printf ("\tNO8BIT\n"); #else /* NO8BIT */ (void) printf ("\t!NO8BIT (8BIT)\n"); +#ifdef HTSPECIAL + (void) printf ("\tHTSPECIAL \"(ISO-HTML mode)\"\n"); +#endif #endif /* NO8BIT */ (void) printf ("\tNRSPECIAL = \"%s\"\n", NRSPECIAL); (void) printf ("\tOLDPAFF = \"%s\"\n", OLDPAFF); @@ -488,6 +499,7 @@ if (arglen > 2) usage (); tflag = 0; /* nroff/troff mode */ + htmlflag = -1; /* non-html mode */ deftflag = 0; if (preftype == NULL) preftype = "nroff"; @@ -496,10 +508,19 @@ if (arglen > 2) usage (); tflag = 1; + htmlflag = -1; /* non-html mode */ deftflag = 1; if (preftype == NULL) preftype = "tex"; break; + /* -h option to enable HTML-mode added by Gerry Tierney */ + case 'h': + if (arglen > 2) + usage (); + tflag = 0; /* non-TeX mode */ + deftflag = 0; + htmlflag = 1; /* Html-Mode */ + break; case 'T': /* Set preferred file type */ p = (*argv)+2; if (*p == '\0') @@ -810,7 +831,7 @@ if (tflag < 0) tflag = (cp = rindex (filename, '.')) != NULL && strcmp (cp, ".tex") == 0; - + if (prefstringchar < 0) { defdupchar = @@ -818,6 +839,13 @@ if (defdupchar < 0) defdupchar = 0; } + /* Modification by Gerry Tierney to set hmtl-mode + * based on file extension */ + if (htmlflag == 0) + htmlflag = + (cp = rindex (filename, '.')) != NULL && + ( strcmp (cp, ".html") == 0 || + strcmp (cp, ".htm") == 0); if ((infile = fopen (filename, "r")) == NULL) { --- ./ispell.h Thu Oct 12 12:04:08 1995 +++ ../ispell-3.1/./ispell.h Tue Jan 4 08:16:24 2000 @@ -42,6 +42,16 @@ /* * $Log: ispell.h,v $ + * + * Patch by Casper Maarbjerg, http://www.nyx.net/~cmaarbj/ + * 1997/05/19, for ISO HTML-entity conversion in html mode. + * Added variable ctok_start to hold the start of raw html word. + * changes wrapped in "#ifdef HTSPECIAL". + * + * Patch by Gerry Tierney <gtierney@nova.ucd.ie> + * 1995/10/14 + * Added variables htmlflag and insidehtml for use in html-mode + * * Revision 1.68 1995/03/06 02:42:41 geoff * Be vastly more paranoid about parenthesizing macro arguments. This * fixes a bug in defmt.c where a complex argument was passed to @@ -623,6 +633,26 @@ INIT (int deftflag, -1); /* NZ for TeX mode by default */ INIT (int tflag, DEFTEXFLAG); /* NZ for TeX mode in current file */ INIT (int prefstringchar, -1); /* Preferred string character type */ +/* The following two definitions added by + * Gerry Tierney <gtierney@nova.ucd.ie> + * 14th Oct 95 + */ +INIT (int htmlflag, 0); /* HTML-checking state. + * 1=enable html-mode, + * 0=enable html-mode based on filename, + * -1=disable html-mode */ +INIT (int insidehtml, 0); /* Flag to indicate that the current html + * tag has spanned more than one line */ +/* End of Gerry's Interference */ +#ifdef HTSPECIAL /* decode "&#;" for HTML-ISO characters */ +#ifdef NO8BIT +#error HTSPECIAL requires NO8BIT to be undefined ! +#endif +#define FIRST_ISO 160 /* First 8-bit code of valid HTML entities */ +extern char *iso_ent[]; /* HTML entities defined in defmt.c */ +extern int html_ent P ((char **in)); +INIT (char *ctok_start, NULL); /* Remember start of raw HTML word */ +#endif INIT (int terse, 0); /* NZ for "terse" mode */ --- ./defmt.c Thu Oct 12 12:04:06 1995 +++ ../ispell-3.1/./defmt.c Tue Jan 4 08:16:24 2000 @@ -54,6 +54,12 @@ /* * $Log: defmt.c,v $ + * ISO-character de-/en- coding in html mode added 1997/05/16 + * by Casper Maarbjerg, http://www.nyx.net/~cmaarbj/ + * + * html-mode code added by Gerry Tierney <gtierney@nova.ucd.ie> + * 14th of Oct '95 + * * Revision 1.41 1995/08/05 23:19:47 geoff * Get rid of an obsolete comment. Add recognition of documentclass and * usepackage for Latex2e support. @@ -140,6 +146,7 @@ static void TeX_open_paren P ((char ** bufp)); static void TeX_skip_check P ((char ** bufp)); static int TeX_strncmp P ((char * a, char * b, int n)); +char * htmlword P ((unsigned char *source)); #define ISTEXTERM(c) (((c) == TEXLEFTCURLY) || \ ((c) == TEXRIGHTCURLY) || \ @@ -160,6 +167,25 @@ static int save_math_mode; static char save_LaTeX_Mode; +static char *skiptag(buf, tagend, taglen) /* Skip past specific tag */ + char * buf; + char * tagend; + int taglen; + { + while(*buf) + { + if (*buf != *tagend && ++buf) + continue; + if (strncasecmp(buf, tagend, taglen) && ++buf) + continue; + buf += taglen; + insidehtml = 0; + break; + } + return(buf); + } + +/* parameters changed by Gerry Tierney to include the output file */ static char * skiptoword (bufp) /* Skip to beginning of a word */ char * bufp; { @@ -170,6 +196,82 @@ || (tflag && (math_mode & 1))) ) { + /* Start of modifications by Gerry Tierney */ + /* We first check for an end-quote character if we are checking + inside of an alt attribute. If we find one we ignore the + rest of the tag */ + if (insidehtml == -1 && *bufp == '\"') + { + insidehtml = 0; + while (*bufp != '>' && *bufp != '\0') + bufp++; + if (*bufp == '\0') + insidehtml = 1; + } + /* If we are checking a html file we want to ignore any + HTML tags. These should start with a '<' + and end with a '>' so we simply skip over anything + between these two symbols. If we reach the end of the line + before finding a matching '>' we set a flag 'insidehtml' */ + if (htmlflag == 1 && *bufp == '<') + { + /* Found start of html tag, if it is a script tag, + * skip until end of script */ + if (insidehtml == 2 || (strncasecmp(bufp,"<script", 7) == 0)) + { + insidehtml = 2; + bufp = skiptag(bufp, "</script>", 9); + } + /* It could also be a comment, containing a '>', so it + * seems safer to skip until the first end-of-comment.. */ + else if (insidehtml == 3 || (strncmp(bufp,"", 3); + } + else { + /* Found start of html tag - Skip to end of tag or EOL */ + while (*bufp != '>' && *bufp != '\0' && + strncasecmp(bufp,"alt=\"",5) != 0) + bufp++; + /* If we find an alt tag, we want to check its text */ + if (strncasecmp(bufp,"alt=\"",5) == 0) + { + insidehtml=-1; + bufp = bufp + 4; + } + else if (*bufp == '\0') + /* we've reached EOL without closing the tag */ + insidehtml = 1; + } + } +#ifndef HTSPECIAL + /* HTSPECIAL characters _NOT_ defined, so... + */ + /* Skip over quoted entities such as " + These all start with an ampersand and + end with a semi-colon. We do not need + to worry about them extending over more than one line */ + if (htmlflag == 1 && *bufp == '&') + { + while (*bufp && *bufp != ';' && *bufp != ' ') + bufp++; + } +#else + if (htmlflag == 1 && *bufp == '&') + { + char *cp2 = bufp; + + if (html_ent(&cp2) >= FIRST_ISO) + break; + if (!skip_ent(&bufp)) + bufp++; + continue; + } +#endif + /* End of modifications by Gerry Tierney */ + + /* check paren necessity... */ if (tflag) /* TeX or LaTeX stuff */ { @@ -329,6 +431,24 @@ lastboundary = NULL; for ( ; ; ) { +#ifdef HTSPECIAL + if (htmlflag == 1 && *bufp == '&') + { + char *cp2 = bufp; /* Avoid compiler complaints ... */ + /* .. about taking address of register bufp */ + if (html_ent(&cp2) < FIRST_ISO) + { + lastboundary = bufp; + bufp = cp2; + break; + } + else + { + lastboundary = NULL; + bufp = cp2; + } + } +#endif if (*bufp == '\0') { if (TeX_comment) @@ -389,7 +509,8 @@ if (hadlf) contextbufs[0][len] = 0; - if (!tflag) + /* Conditions modified by Gerry Tierney to handle html-mode */ + if (!tflag && htmlflag != 1) { /* skip over .if */ if (*currentchar == NRDOT @@ -426,7 +547,8 @@ /* if this is a formatter command, skip over it */ - if (!tflag && *currentchar == NRDOT) + /* Conditions modified by Gerry Tierney to handle html-mode */ + if (!tflag && htmlflag != 1 && *currentchar == NRDOT) { while (*currentchar && !myspace (chartoichar (*currentchar))) { @@ -441,10 +563,47 @@ return; } } + /* Start of modifications by Gerry Tierney */ + /* If we are checking a htmlfile and we have being left with + an open tag from a previous line, then we ignore everything + from the start of the line until we either reach the end of + the line or we close the tag */ + if (htmlflag == 1) + { + if (insidehtml == 1) + while (*currentchar != '>' && *currentchar != '\0') + { + /* We check for an alt attribute (found inside img + tags). We want to spell check it's text so if + we find one, we switch out html-mode until we + find the next quote character. We signal this + state by setting the insidehtml flag to -1 */ + if (strncasecmp(currentchar,"alt=\"",5) == 0) + { + copyout(¤tchar,5); + insidehtml = -1; + break; + } + (void) putc (*currentchar, ofile); + currentchar++; + } + else if (insidehtml == 2) /* filtering javascript */ + currentchar = skiptag(currentchar, "</script>", 9); + else if (insidehtml == 3) /* filtering comments */ + currentchar = skiptag(currentchar, "-->", 3); + else if (*currentchar == '>') + /* We've closed the tag so we reset the flag */ + insidehtml = 0; + } + /* End of modifications by Gerry Tierney */ + for ( ; ; ) { p = skiptoword (currentchar); +#ifdef HTSPECIAL + ctok_start = p; +#endif if (p != currentchar) copyout (¤tchar, p - currentchar); @@ -453,6 +612,23 @@ p = ctoken; endp = skipoverword (currentchar); +#ifdef HTSPECIAL + if (htmlflag == 1) /* We are honoring the ISO-HTML entities, */ + { /* and have to convert to ISO before lookup */ + while (currentchar < endp && p < ctoken + sizeof ctoken - 1) + { + if (*currentchar == '&') + { + *p++ = html_ent(¤tchar); + if (currentchar > endp) + currentchar = endp; + } + else + *p++ = *currentchar++; + } + } + else +#endif while (currentchar < endp && p < ctoken + sizeof ctoken - 1) *p++ = *currentchar++; *p = 0; @@ -545,6 +721,11 @@ } } if (!aflag && !lflag) +#ifdef HTSPECIAL + if (htmlflag == 1) /* Translate into output file */ + (void) fprintf (ofile, "%s", htmlword(ctoken)); + else +#endif (void) fprintf (ofile, "%s", ctoken); } @@ -899,3 +1080,178 @@ } return cmpresult; } + + +#ifdef HTSPECIAL + +/* + * Code to convert from / to ISO HTML-entities. + * + * Decoding of alphabetic entities is performed by two table lookups, + * one for each of the first two characters after the `&'. + * + * The first lookup decides which string to use for the second lookup, + * and if both match, the corresponding position in the isochar array + * holds the character value. + * + * After the 8-bit value is determined, the input is verified against + * the iso_ent array, using strncmp(), and in case of mismatch the + * function returns the input character unconverted. + * + * The alternate numeric form of &#nnn; is also decoded by atoi, and + * checked for sanity, but will be converted to the name-form on output. + * + * Encoding is performed by htmlword on characters between FIRST_ISO and 255, + * and the iso_ent table must hold an entry for each. + */ +static char *Y_key = "ACEINOTUYsaceinotuy"; /* Primary key */ + +static char *X_key[] = { /* Secondary key: */ + "gacturE", + "c", + "gacuT", + "gacu", + "t", + "gactus", + "h", + "gacu", + "a", + "z", + "gacture", + "c", + "gacut", + "gacu", + "t", + "gactus", + "h", + "gacu", + "au" +}; + +static unsigned char *isochar[] = { /* 8-bit values of above table */ + "\300\301\302\303\304\305\306", + "\307", + "\310\311\312\313\320", + "\314\315\316\317", + "\321", + "\322\323\324\325\326\330", + "\336", + "\331\332\333\334", + "\335", + "\337", + "\340\341\342\343\344\345\346", + "\347", + "\350\351\352\353\360", + "\354\355\356\357", + "\361", + "\362\363\364\365\366\370", + "\376", + "\371\372\373\374", + "\375\377", +}; + +/* + * Reference: http://uts.cc.utexas.edu/~churchh/latin1.html + */ +char *iso_ent[] = { /* Valid HTML characters above 160 in numerical order */ + " ", "¡", "¢", "£", "¤", "¥", + "¦", "§", "¨", "©", "ª", "«", + "¬", "", "®", "¯", "°", "±", + "²", "³", "´", "µ", "¶", "·", + "¸", "¹", "º", "»", "¼", "½", + "¾", "¿", + "À", "Á", "Â", "Ã", "Ä", "Å", "Æ", + "Ç", + "È", "É", "Ê", "Ë", + "Ì", "Í", "Î", "Ï", + "Ð", "Ñ", + "Ò", "Ó", "Ô", "Õ", "Ö", "×", "Ø", + "Ù", "Ú", "Û", "Ü", + "Ý", "Þ", "ß", + "à", "á", "â", "ã", "ä", "å", "æ", + "ç", + "è", "é", "ê", "ë", + "ì", "í", "î", "ï", + "ð", "ñ", + "ò", "ó", "ô", "õ", "ö", "÷", "ø", + "ù", "ú", "û", "ü", + "ý", "þ", "Ÿ" +}; + +/* Increment pointer past ignored entity, returning nonzero on success + */ +int skip_ent(char **entity) +{ + char **cpp; + int j, match = 0; + + if (strncmp(*entity, "<" , 4) == 0 || strncmp(*entity, ">" , 4) == 0) + match = 3; + else if (strncmp(*entity, "&" , 5) == 0) + match = 4; + else if (strncmp(*entity, """ , 6) == 0) + match = 5; + else if (strncmp(*entity, " " , 6) == 0) + match = 5; + if (match) + *entity += match; + return(match); +} + +/* Return 8-bit value of valid html-entity pointed to by *in, incrementing + * the pointer by the length of the tag. + * Only the first two alpha characters after '&' is tested, then the + * decoded char is verified against the iso_ent array. + */ +int html_ent(char **in) + { + char *cp, *cp2, ch; + char *decoded; + int row, val, taglen = 1; + + cp = cp2 = *in; + val = ch = **in; + + if (*++cp && *cp == '#' && (*++cp == '1' || *cp == '2') && (val = atoi(cp))) + { + if (val > 255 || *++cp < '0' || *cp > '5' || *++cp < '0' || *cp > '9' || *++cp!=';') + val = 0; + else + taglen = 6; /* Validate numeric tag */ + } + else + { + if ((cp = index(Y_key, *++cp2)) && + (decoded = index(*(X_key+(row = cp-Y_key)), *++cp2)) && + (val = isochar[row] [ decoded - X_key[row]]) >= FIRST_ISO) + taglen = strlen(iso_ent[val - FIRST_ISO]); + if (val<FIRST_ISO || strncmp(iso_ent[val-FIRST_ISO], *in, taglen) != 0) + { + taglen = 1; + val = 0; /* Disqualify alphabetic tag */ + } + } + *in += taglen; + return(val ? val : ch); + } + +/* convert word with 8-bit chars in a locally stored, static string, + * to properly escaped html, returning a pointer to it. + */ +char *htmlword(unsigned char *source) + { + static char word[256]; + register char *cp, *ent; + + for (cp = word; *source && (cp - word < 255); ++source) + if (*source < FIRST_ISO) + *cp++ = *source; + else + for (ent = iso_ent[*source-FIRST_ISO]; *ent && (cp-word < 255); ++ent) + *cp++ = *ent; + *cp = '\0'; + return(word); + } + +#endif + --- ./ispell.1X Mon Jan 23 10:28:25 1995 +++ ../ispell-3.1/./ispell.1X Tue Jan 4 08:16:24 2000 @@ -38,6 +38,9 @@ .\" SUCH DAMAGE. .\" .\" $Log: ispell.1X,v $ +.\" +.\" Documentation for html-mode added by Gerry Tierney 10/14/1995 +.\" .\" Revision 1.80 1995/01/08 23:23:31 geoff .\" Document the new personal-dictionary behavior (dictionary named after .\" the hash file is preferred). @@ -110,6 +113,7 @@ .IP \fIcommon-flags\fP: .RB [ \-t ] .RB [ \-n ] +.RB [ \-h ] .RB [ \-b ] .RB [ \-x ] .RB [ \-B ] @@ -296,6 +300,8 @@ The input file is in TeX or LaTeX format. .IP \fB\-n\fR The input file is in nroff/troff format. +.IP \fB\-h\fR +The input file is in html format. .IP \fB\-b\fR Create a backup file by appending ".bak" to the name of the input file. @@ -337,8 +343,12 @@ .RB ( \-n ) or TeX/LaTeX .RB ( \-t ) -input mode. -(The default is controlled by the DEFTEXFLAG installation option.) +input mode (This does not work for html +.RB ( \-h ) +mode. However html-mode is assumed for any files with a ".html" +or ".htm" extension unless nroff/troff or TeX/LaTeX modes have +been explicitly defined). +(The default mode is controlled by the DEFTEXFLAG installation option.) TeX/LaTeX mode is also automatically selected if an input file has the extension ".tex", unless overridden by the .B \-n --- ./fields.c Tue Jan 25 10:31:54 1994 +++ ../ispell-3.1/./fields.c Tue Jan 4 08:23:15 2000 @@ -61,8 +61,10 @@ extern void free (); extern char * malloc (); extern char * realloc (); +#ifndef __STRING_H__ extern char * strchr (); extern int strlen (); +#endif /* __STRING_H__ */ /* * Read one line of the given file into a buffer, break it up into --- ./version.h Thu Oct 12 12:04:46 1995 +++ ../ispell-3.1/./version.h Tue Jan 4 08:27:26 2000 @@ -8,10 +8,13 @@ */ static char * Version_ID[] = { - "@(#) International Ispell Version 3.1.20 10/10/95", + "@(#) International Ispell Version 3.1.20 1995/10/10, sgipl1", "@(#) Copyright (c), 1983, by Pace Willisson", "@(#) International version Copyright (c) 1987, 1988, 1990-1995,", "@(#) by Geoff Kuenning, Granada Hills, CA. All rights reserved.", + "@(#)", + "@(#) Configured and built for SGI Freeware by Vince Levey", + "@(#) (http://reality.sgi.com/vincel/).", "@(#)", "@(#) Redistribution and use in source and binary forms, with or without", "@(#) modification, are permitted provided that the following conditions",